Education For Employment (EFE) is the leading nonprofit that trains youth and links them to jobs across the Middle East and North Africa (MENA). This pivotal region is the hardest place on the planet for youth to get their first job – they are three times more likely to be unemployed than older adults.
EFE is interested in the effectiveness of their programs, particularly whether graduates find stable employment. We have data on about 7,000 participants in almost 500 program cohorts spread across 8 countries. Participants bring diverse skills, interests, and backgrounds. Programs employ a variety of training models and placement policies. How well are different programs working, and for whom?
EFE has an Salesforce database that houses all information about the organization’s programs, participants, and job placement and retention outcomes. The datasets used in this project include:
Initially, the data exports from Salesforce were pre-processed in the following ways:
The preprocessing script can be viewed on GitHub so that the deidentification steps can be reproduced with new data exports.
The contact dataset contains 8 columns that relate to when each participant obtained employment. These can be collapsed into a single column that gives the time it took for the participant to get placed, or that they were not placed or could not be reached. Below, the new composite column is on the right, and the original job placement columns can be removed.
## Clean the contacts file column names
names(contacts)[28] <- "pl_grad"
names(contacts)[29] <- "pl_grad_data"
names(contacts)[30] <- "pl_3"
names(contacts)[31] <- "pl_3_data"
names(contacts)[32] <- "pl_6"
names(contacts)[33] <- "pl_6_data"
names(contacts)[34] <- "pl_9"
names(contacts)[35] <- "pl_9_data"
names(contacts)[36] <- "pl_12"
names(contacts)[37] <- "pl_12_data"
cols.num <- c(28:37)
contacts[cols.num] <- sapply(contacts[cols.num],as.numeric)
## Make collapsed job placement month column and columns that indicate retention at each 3 month period after placement
contacts_raw<-contacts%>%
mutate(months_job=ifelse(pl_grad==1,"Placed at Graduation",
ifelse(pl_3==1,"Placed at 3 Months",
ifelse(pl_6==1,"Placed at 6 Months",
ifelse(pl_9==1,"Placed at 9 Months",
ifelse(pl_12==1,"Placed at 12 Months",
ifelse(pl_12_data==1&pl_12==0,"Not Placed","Not Reached at 12 Months")
)))
)))%>%
mutate(sorting=ifelse(pl_grad==1,1,
ifelse(pl_3==1,2,
ifelse(pl_6==1,3,
ifelse(pl_9==1,4,
ifelse(pl_12==1,5,
ifelse(pl_12_data==1&pl_12==0,6,7)
)))
)))%>%
mutate(Retention_6_months=ifelse(X6.Month.Job.Retention==1,1,
ifelse(X6.Month.Job.Retention==0&X6.Month.Post.Placement.Data.Avail.YES==1,0,NA)))%>%
mutate(`Retention at 6 Months`=ifelse(X6.Month.Job.Retention==1,"Yes",
ifelse(X6.Month.Job.Retention==0&X6.Month.Post.Placement.Data.Avail.YES==1,"No","Data Not Available")))%>%
mutate(Retention_3_months=ifelse(X3.Month.Job.Retention==1,1,
ifelse(X3.Month.Job.Retention==0&X3.Month.Post.Placement.Data.Avail.YES==1,0,NA)))%>%
mutate(`Retention at 3 Months`=ifelse(X3.Month.Job.Retention==1,"Yes",
ifelse(X3.Month.Job.Retention==0&X3.Month.Post.Placement.Data.Avail.YES==1,"No","Data Not Available")))%>%
mutate(Retention_9_months=ifelse(X9.Month.Job.Retention==1,1,
ifelse(X9.Month.Job.Retention==0&X9.Month.Post.Placement.Data.Avail.YES==1,0,NA)))%>%
mutate(`Retention at 9 Months`=ifelse(X9.Month.Job.Retention==1,"Yes",
ifelse(X9.Month.Job.Retention==0&X9.Month.Post.Placement.Data.Avail.YES==1,"No","Data Not Available")))%>%
mutate(Retention_12_months=ifelse(X12.Month.Job.Retention==1,1,
ifelse(X12.Month.Job.Retention==0&X12.Month.Post.Placement.Data.Avail.YES==1,0,NA)))%>%
mutate(`Retention at 12 Months`=ifelse(X12.Month.Job.Retention==1,"Yes",
ifelse(X12.Month.Job.Retention==0&X12.Month.Post.Placement.Data.Avail.YES==1,"No","Data Not Available")))
## make data to display in table
months_job<-contacts_raw%>%
dplyr::select(28:37,107,108)%>%
group_by(months_job)%>%
filter(row_number()==1)%>%
arrange(sorting)%>%
dplyr::select(1,3,5,7,9,10)
## create table
datatable(months_job, rownames = FALSE, options = list(dom="t",pageLength = 1000, paging=FALSE),callback = JS(callback))
## Filter out contacts who joined the program less than 6 months before the data was pulled, and those who got jobs 30 or more days before graduating from the program
contactsFiltered<-contacts_raw%>%
filter(as.Date(EFE.Graduation.Date)<=as.Date("2022-04-21"))%>%
filter(as.numeric(Number.Days.Until.Job.Placement)>=-30|is.na(as.numeric(Number.Days.Until.Job.Placement)))
Job retention at 6 months is the initial outcome variable in the analysis. Therefore, if the participant can not be reached at 6 months after job placement, they are filtered out. Participants that graduated less than 6 months before the data was pulled, and participants that got a job more than 30 days before graduating are also filtered out. After these steps, of the original 7124 participants in the data, only 2652 remain in the dataset that will be included in the analysis.
Overall, of the 2652 participants there is data for, 47.6% had retained employment 6 months after being placed in a job.
## create data for retention table
retentiontable<-contactsFiltered%>%
group_by(`Retention at 6 Months`)%>%
summarise(Participants=n_distinct(ContactID))
## create table
datatable(retentiontable, rownames = FALSE, options = list(dom="t",pageLength = 1000, paging=FALSE),callback = JS(callback))
The analysis is initially interested in retention at 6 months; therefore the employment status check data will be filtered to contain only the 6 month surveys, and those survey responses can be joined to the participant contact information retaining a 1:1 relationship.
## create numerical encodings for each response option in confidence and self-efficacy surveys
surveys<-PrePost%>%
mutate(ClassID=as.numeric(ClassID))%>%
mutate(ConfidenceTeam=ifelse(Confidence.in.Working.on.a.Team=="Not at all confident",1,
ifelse(Confidence.in.Working.on.a.Team=="Unconfident",2,
ifelse(Confidence.in.Working.on.a.Team=="Neutral",3,
ifelse(Confidence.in.Working.on.a.Team=="Confident",4,
ifelse(Confidence.in.Working.on.a.Team=="Very confident",5,NA))))))%>%
mutate(ConfidenceCommunication=ifelse(Confidence.in.Communicating.w.Colleague=="Not at all confident",1,
ifelse(Confidence.in.Communicating.w.Colleague=="Unconfident",2,
ifelse(Confidence.in.Communicating.w.Colleague=="Neutral",3,
ifelse(Confidence.in.Communicating.w.Colleague=="Confident",4,
ifelse(Confidence.in.Communicating.w.Colleague=="Very confident",5,NA))))))%>%
mutate(ConfidenceResolvingProblems=ifelse(Confidence.in.Resolving.Problems=="Not at all confident",1,
ifelse(Confidence.in.Resolving.Problems=="Unconfident",2,
ifelse(Confidence.in.Resolving.Problems=="Neutral",3,
ifelse(Confidence.in.Resolving.Problems=="Confident",4,
ifelse(Confidence.in.Resolving.Problems=="Very confident",5,NA))))))%>%
mutate(ConfidenceJobMatching=ifelse(Confidence.in.Finding.Job.Matching.Back.=="Not at all confident",1,
ifelse(Confidence.in.Finding.Job.Matching.Back.=="Unconfident",2,
ifelse(Confidence.in.Finding.Job.Matching.Back.=="Neutral",3,
ifelse(Confidence.in.Finding.Job.Matching.Back.=="Confident",4,
ifelse(Confidence.in.Finding.Job.Matching.Back.=="Very confident",5,NA))))))%>%
mutate(ConfidencePresentingYourself=ifelse(Confidence.in.Presenting.Yourself.to.Em.=="Not at all confident",1,
ifelse(Confidence.in.Presenting.Yourself.to.Em.=="Unconfident",2,
ifelse(Confidence.in.Presenting.Yourself.to.Em.=="Neutral",3,
ifelse(Confidence.in.Presenting.Yourself.to.Em.=="Confident",4,
ifelse(Confidence.in.Presenting.Yourself.to.Em.=="Very confident",5,NA))))))%>%
mutate(ConfidenceTotal=ConfidenceCommunication+ConfidenceTeam+ConfidencePresentingYourself+ConfidenceJobMatching+ConfidenceResolvingProblems)%>%
mutate(Stage=ifelse(str_detect(Survey..Record.Type,"Post"),"Post","Pre"))%>%
mutate(EfficacyOptimistic=ifelse(Optimistic.About.my.Future=="Strongly disagree",1,
ifelse(Optimistic.About.my.Future=="Disagree",2,
ifelse(Optimistic.About.my.Future=="Neutral",3,
ifelse(Optimistic.About.my.Future=="Agree",4,
ifelse(Optimistic.About.my.Future=="Strongly agree",5,NA))))))%>%
mutate(EfficacyOvercome=ifelse(Work.Seriously.to.Overcome.Challenge=="Strongly disagree",1,
ifelse(Work.Seriously.to.Overcome.Challenge=="Disagree",2,
ifelse(Work.Seriously.to.Overcome.Challenge=="Neutral",3,
ifelse(Work.Seriously.to.Overcome.Challenge=="Agree",4,
ifelse(Work.Seriously.to.Overcome.Challenge=="Strongly agree",5,NA))))))%>%
mutate(EfficacyQualifications=ifelse(Necessary.Qualifications.to.Succeed=="Strongly disagree",1,
ifelse(Necessary.Qualifications.to.Succeed=="Disagree",2,
ifelse(Necessary.Qualifications.to.Succeed=="Neutral",3,
ifelse(Necessary.Qualifications.to.Succeed=="Agree",4,
ifelse(Necessary.Qualifications.to.Succeed=="Strongly agree",5,NA))))))%>%
mutate(EfficacyFindaJob=ifelse(Can.Find.a.Job.in.the.Appropriate.Career=="Strongly disagree",1,
ifelse(Can.Find.a.Job.in.the.Appropriate.Career=="Disagree",2,
ifelse(Can.Find.a.Job.in.the.Appropriate.Career=="Neutral",3,
ifelse(Can.Find.a.Job.in.the.Appropriate.Career=="Agree",4,
ifelse(Can.Find.a.Job.in.the.Appropriate.Career=="Strongly agree",5,NA))))))%>%
mutate(EfficacyFamiliarRights=ifelse(Familiar.With.Rights.and.Duties=="Strongly disagree",1,
ifelse(Familiar.With.Rights.and.Duties=="Disagree",2,
ifelse(Familiar.With.Rights.and.Duties=="Neutral",3,
ifelse(Familiar.With.Rights.and.Duties=="Agree",4,
ifelse(Familiar.With.Rights.and.Duties=="Strongly agree",5,NA))))))%>%
mutate(EfficacyTotal=EfficacyFamiliarRights+EfficacyFindaJob+EfficacyQualifications+EfficacyOvercome+EfficacyOptimistic)
surveys<-as.data.frame(lapply(surveys,unlist))
## Create table of confidence scores and changes in confidence ratings
Confidence<-surveys%>%
dplyr::select(2,27,28)%>%
pivot_wider(names_from = Stage,values_from = ConfidenceTotal)%>%
filter(Pre!="NULL"&Post!="NULL")%>%
mutate(ConfidenceChange=as.numeric(Post)-as.numeric(Pre),
ConfidencePercent=formattable::percent(as.numeric(Post)-as.numeric(Pre))/as.numeric(Pre))%>%
dplyr::rename("ConfidencePre"=3,
"ConfidencePost"=2)
Confidence<-as.data.frame(lapply(Confidence,unlist))
## Create table of self-efficacy scores and changes in self-efficacy ratings
Efficacy<-surveys%>%
dplyr::select(2,34,28)%>%
pivot_wider(names_from = Stage,values_from = EfficacyTotal)%>%
filter(Pre!="NULL"&Post!="NULL")%>%
mutate(EfficacyChange=as.numeric(Post)-as.numeric(Pre),
EfficacyPercent=formattable::percent(as.numeric(Post)-as.numeric(Pre))/as.numeric(Pre))%>%
dplyr::rename("EfficacyPre"=3,
"EfficacyPost"=2)
Efficacy<-as.data.frame(lapply(Efficacy,unlist))
## Create dataframe of most significant change answers
MostSignificantChange<-surveys%>%
filter(Stage=="Post")%>%
dplyr::select(2,17,18,19)
## Join confidence, self-efficacy, and most significant change dataframes to the filtered contacts data to produce a dataframe containing the key information from each
contactsProcessed<-contactsFiltered%>%
left_join(Confidence,by="ContactID")%>%
left_join(Efficacy,by="ContactID")%>%
left_join(MostSignificantChange,by="ContactID")
contactsWithTarget<-contactsProcessed%>%
filter(!is.na(Retention_6_months))
contactswithEfficacy<-contactsWithTarget%>%
filter(!is.na(EfficacyPercent))
contactswithConfidence<-contactsWithTarget%>%
filter(!is.na(ConfidencePercent))
contactswithChange<-contactsWithTarget%>%
filter(!is.na(First.Change))
#contactsProcessed<-as.data.frame(lapply(contactsProcessed,unlist))
The pre and post training surveys contain questions around confidence and self-efficacy that EFE is interested in looking into. There are five different questions relating to confidence, with answers on a “not at all confident” to “very confident” scale. These answers are turned into numbers so that a composite index can be created, and so that changes in confidence after participants have been through the training programs can be more easily measured.
793 of 2652 participants that remain in the filtered dataset have pre/post survey data. A sample of the confidence and self-efficacy change scores calculated are show below. These are each then joined to the contacts dataset.
The retention rates for participants that have data at each time interval is shown below:
## Calculate retention rates by gender
genderTotals<-contactsFiltered%>%
dplyr::select(ContactID, Gender,109,111,113,115)%>%
pivot_longer(3:6,names_to = "TimeScale",values_to = "Retained")%>%
filter(!is.na(Retained))%>%
group_by(Gender)%>%
summarise(Total=n_distinct(ContactID))
genderAll<-contactsFiltered%>%
dplyr::select(ContactID, Gender,109,111,113,115)%>%
pivot_longer(3:6,names_to = "TimeScale",values_to = "Retained")%>%
filter(!is.na(Retained))%>%
group_by(Gender, TimeScale)%>%
summarise(Retained=sum(Retained),
Total=n_distinct(ContactID))%>%
mutate(Percent=percent(Retained/Total))%>%
arrange(TimeScale)%>%
dplyr::select(1,2,5)%>%
pivot_wider(names_from = TimeScale,values_from = Percent)%>%
dplyr::relocate(2,.after = 5)%>%
left_join(genderTotals)
datatable(genderAll, rownames = FALSE, options = list(dom="t",pageLength = 1000, paging=FALSE),callback = JS(callback))
The retention rates for participants that have data at each time interval is shown below:
## Calculate retention rates by country
countryTotals<-contactsFiltered%>%
dplyr::select(ContactID, `Country of Programming`,109,111,113,115)%>%
pivot_longer(3:6,names_to = "TimeScale",values_to = "Retained")%>%
filter(!is.na(Retained))%>%
group_by(`Country of Programming`)%>%
summarise(Total=n_distinct(ContactID))
country<-contactsFiltered%>%
dplyr::select(ContactID, `Country of Programming`,109,111,113,115)%>%
pivot_longer(3:6,names_to = "TimeScale",values_to = "Retained")%>%
filter(!is.na(Retained))%>%
group_by(`Country of Programming`, TimeScale)%>%
summarise(Retained=sum(Retained),
Total=n_distinct(ContactID))%>%
mutate(Percent=percent(Retained/Total))%>%
arrange(TimeScale)%>%
dplyr::select(1,2,5)%>%
pivot_wider(names_from = TimeScale,values_from = Percent)%>%
dplyr::relocate(2,.after = 5)%>%
left_join(countryTotals)
# country<-contactsFiltered%>%
# filter(!is.na(Retention_6_months))%>%
# group_by(`Country of Programming`,`Retention at 6 Months`)%>%
# summarise(Participants=n_distinct(ContactID))%>%
# mutate(Percent=percent(Participants/sum(Participants)),
# Total=sum(Participants))%>%
# dplyr::select(1,2,4,5)%>%
# pivot_wider(names_from = `Retention at 6 Months`,values_from = Percent)
datatable(country, rownames = FALSE, options = list(dom="t",pageLength = 1000, paging=FALSE),callback = JS(callback))
The retention rates for participants that have data at each time interval is shown below:
## Calculate retention rates by time of job placement
placementTotals<-contactsFiltered%>%
dplyr::select(ContactID, months_job,109,111,113,115)%>%
pivot_longer(3:6,names_to = "TimeScale",values_to = "Retained")%>%
filter(!is.na(Retained))%>%
group_by(months_job)%>%
summarise(Total=n_distinct(ContactID))
placement<-contactsFiltered%>%
dplyr::select(ContactID, months_job,109,111,113,115)%>%
pivot_longer(3:6,names_to = "TimeScale",values_to = "Retained")%>%
filter(!is.na(Retained))%>%
group_by(months_job, TimeScale)%>%
summarise(Retained=sum(Retained),
Total=n_distinct(ContactID))%>%
mutate(Percent=percent(Retained/Total))%>%
arrange(TimeScale)%>%
dplyr::select(1,2,5)%>%
pivot_wider(names_from = TimeScale,values_from = Percent)%>%
dplyr::relocate(2,.after = 5)%>%
left_join(placementTotals)%>%
dplyr::rename("Placement Time"=1)
# placement<-contactsFiltered%>%
# filter(!is.na(Retention_6_months))%>%
# group_by(months_job,`Retention at 6 Months`)%>%
# summarise(Participants=n_distinct(ContactID),
# Sort=first(sorting))%>%
# mutate(Percent=percent(Participants/sum(Participants)),
# Total=sum(Participants))%>%
# arrange(Sort)%>%
# dplyr::select(1,2,5,6)%>%
# pivot_wider(names_from = `Retention at 6 Months`,values_from = Percent)%>%
# dplyr::rename("Placement Time"=1)
datatable(placement, rownames = FALSE, options = list(dom="t",pageLength = 1000, paging=FALSE),callback = JS(callback))
The plots below utilize the composite confidence and self-efficacy scores to show overall changes between pre and post surveys.These plots include participants that had pre and post survey data, not only the ones that have pre and post survey data and 6 month job retention data.
There are actually slight negative correlations between increases in confidence and efficacy scores and retention rates:
##
## Pearson's product-moment correlation
##
## data: contactsWithTarget$EfficacyChange and contactsWithTarget$Retention_6_months
## t = -2.5096, df = 791, p-value = 0.01228
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.15752412 -0.01937959
## sample estimates:
## cor
## -0.08887926
##
## Pearson's product-moment correlation
##
## data: contactsWithTarget$ConfidenceChange and contactsWithTarget$Retention_6_months
## t = -2.1018, df = 791, p-value = 0.03589
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.143398214 -0.004928606
## sample estimates:
## cor
## -0.07452262
A categorical variable was calculated to identify those who had a job at one point, and then lost it. This was calculated by looking at each of the four time intervals (3 months, 6 months, 9 months, and 12 months), and taking all participants with a retention at that stage of “NO,” who also had a placement at any of the preceding time intervals.
## Joining ESC Surveys with contacts who are considered to have lost a job
contactsProcessedwithESCH<-contactsProcessed%>%
filter(!is.na(Retention_6_months))
Lost_job<-contactsProcessedwithESCH%>%
filter((`Retention at 3 Months`=="No"&(pl_grad==1))|
(`Retention at 6 Months`=="No"&(pl_grad==1|pl_3==1))|
(`Retention at 9 Months`=="No"&(pl_grad==1|pl_3==1||pl_6==1))|
(`Retention at 12 Months`=="No"&(pl_grad==1|pl_3==1|pl_6==1|pl_9==1)))
Lost_job_contacts<-unique(Lost_job$ContactID)
Lost_job_ESC<-ESC%>%
filter(ContactID%in%Lost_job_contacts)%>%
filter(Employment.Status.Check.Type=="3-Month Post-Placement Status Check"|Employment.Status.Check.Type=="6-Month Post-Placement Status Check"|Employment.Status.Check.Type=="9-Month Post-Placement Status Check"|Employment.Status.Check.Type=="12-Month Post-Placement Status Check")
ReasonsForLeaving<-Lost_job_ESC%>%
dplyr::select(ContactID,Employment.Status.Check.Type,First.Reason.Left.Last.Job)%>%
pivot_wider(names_from = Employment.Status.Check.Type,values_from = First.Reason.Left.Last.Job)%>%
mutate(`3-Month Post-Placement Status Check`=ifelse(`3-Month Post-Placement Status Check`=="NULL",NA,`3-Month Post-Placement Status Check`),
`6-Month Post-Placement Status Check`=ifelse(`6-Month Post-Placement Status Check`=="NULL",NA,`6-Month Post-Placement Status Check`),
`9-Month Post-Placement Status Check`=ifelse(`9-Month Post-Placement Status Check`=="NULL",NA,`9-Month Post-Placement Status Check`),
`12-Month Post-Placement Status Check`=ifelse(`12-Month Post-Placement Status Check`=="NULL",NA,`12-Month Post-Placement Status Check`))%>%
mutate(ReasonsforLeaving=ifelse(!is.na(`12-Month Post-Placement Status Check`),`12-Month Post-Placement Status Check`,
ifelse(!is.na(`9-Month Post-Placement Status Check`),`9-Month Post-Placement Status Check`,
ifelse(!is.na(`6-Month Post-Placement Status Check`),`6-Month Post-Placement Status Check`,`3-Month Post-Placement Status Check`))))%>%
mutate(ReasonsforLeaving=ifelse(str_detect(ReasonsforLeaving,"Position was temporary"),"Position was temporary",
ifelse(str_detect(ReasonsforLeaving,"Benefits"),"Benefits",
ifelse(str_detect(ReasonsforLeaving,"Salary"),"Salary",
ifelse(str_detect(ReasonsforLeaving,"Job not sufficiently prestigious"),"Job not sufficiently prestigious",
ifelse(str_detect(ReasonsforLeaving,"Focused on studyings"),"Focused on studying",
ifelse(str_detect(ReasonsforLeaving,"Wants to start own business"),"Wants to start own business",
ifelse(str_detect(ReasonsforLeaving,"Dismissed from job"),"Dismissed from job",
ifelse(str_detect(ReasonsforLeaving,"Job not matched with educatio"),"Job not matched with education",
ifelse(str_detect(ReasonsforLeaving,"Job too far from home"),"Job too far from home",
ifelse(str_detect(ReasonsforLeaving,"Position not appropriate"),"Position not appropriate",
ifelse(str_detect(ReasonsforLeaving,"Other"),"Other",
ifelse(str_detect(ReasonsforLeaving,"NA"),"NA",ReasonsforLeaving)))))))))))))
Reasons<-unique(ReasonsForLeaving$ReasonsforLeaving)
#unique(ESC$Employment.Status.Check.Type)
#
# ESC_not_retained<-ESC%>%
# filter(Employment.Status.Check.Type=="6-Month Post-Placement Status Check")%>%
# group_by(ContactID)%>%
# slice(which.max(Survey..Created.Date))
Using the categorization calculated above for who lost their jobs at any point, the resons for leaving were looked at. Of 1165 participants who met this criteria, 0 particpants had at least one answer in an employment status check survey where they gave a reason for leaving their first job. If there were answers given in multiple employment status checks, the most recent answer was used. A sample of how this was done is below, where “ReasonsforLeaving” is the combined field using the most recent response. Responses were also cleaned to remove stray commas and nested answers.
The frequency of reasons given for leaving the first job is below, with NA and Other being the most frequent, and “Position is Temporary” and “Salary” being the top non-other responses for participants where there is data.
## Calculate counts of reasons for leaving
ReasonsForLeaving<-ReasonsForLeaving%>%
unnest(ReasonsforLeaving)%>%
group_by(ReasonsforLeaving)%>%
add_count()%>%
ungroup()%>%
mutate(ReasonsforLeaving = fct_reorder(ReasonsforLeaving, n, .desc = FALSE))%>%
dplyr::select(1,6)
## Join in reasons
contactsProcessedReasons<-contactsProcessed%>%
left_join(ReasonsForLeaving,by="ContactID")
## Plot reasons
ggplot(ReasonsForLeaving, aes(x=ReasonsforLeaving))+
coord_flip()+
geom_text(stat='count', aes(label=..count..), hjust=-.5, size=2.5)+
geom_bar(fill="#951B0D")+theme_minimal()
## Calculate rates of retention for different job placement types
participants<-contactsFiltered%>%
filter(!is.na(Retention_6_months))%>%
group_by(Earliest.Job.Placement.Type,`Retention at 6 Months`)%>%
summarise(Participants=n_distinct(ContactID))%>%
mutate(Percent=percent(Participants/sum(Participants)),
Total=sum(Participants))%>%
dplyr::select(1,2,4,5)%>%
pivot_wider(names_from = `Retention at 6 Months`,values_from = Percent)%>%
dplyr::rename("Job Placement Type"=1)
participantsCount<-contactsFiltered%>%
filter(!is.na(Retention_6_months))
#n_distinct(participantsCount$ContactID[participantsCount$Retention_6_months==1])
datatable(participants, rownames = FALSE, options = list(dom="t",pageLength = 1000, paging=FALSE),callback = JS(callback))
Of the 1264 participants who retained their jobs at least 6 months, the breakdown of different job types that were retained is below:
## Count job types of those retained past 6 months
JobTypesRetained<-contactsProcessedReasons%>%
filter(Retention_6_months==1|Retention_9_months==1|Retention_12_months==1)%>%
group_by(Earliest.Job.Placement.Type)%>%
add_count()%>%
ungroup()%>%
mutate(Earliest.Job.Placement.Type = fct_reorder(Earliest.Job.Placement.Type, n, .desc = FALSE))
#levels(JobTypesRetained$Earliest.Job.Placement.Type)
#JobTypesRetained$Earliest.Job.Placement.Type
ggplot(JobTypesRetained, aes(x=Earliest.Job.Placement.Type))+
coord_flip()+
geom_text(stat='count', aes(label=..count..), hjust=-.5, size=2.5)+
geom_bar(fill="#951B0D")+theme_minimal()
A series of models will be tested to determine whether there are features that are important in whether or not participants retained the job the were placed in after 6 months.
#
# contactsProcessed$Number.Days.Until.Job.Placement<-as.numeric(contactsProcessed$Number.Days.Until.Job.Placement)
#
# contactsProcessed$Number.Days.Retained.Job<-as.numeric(contactsProcessed$Number.Days.Retained.Job)
#
# contactsProcessed$Number.Family.Members<-as.numeric(contactsProcessed$Number.Family.Members)
#
# contactsProcessed$X..Of.Days.Continuously.Working<-as.numeric(contactsProcessed$X..Of.Days.Continuously.Working)
#
# contactsProcessed$Number.Family.Members.Working<-as.numeric(contactsProcessed$Number.Family.Members.Working)
#
# contactsProcessed$Number.of.Previous.Jobs<-as.character(contactsProcessed$Number.of.Previous.Jobs)
#
# contactsProcessed$Monthly.Family.Income<-as.numeric(contactsProcessed$Monthly.Family.Income)
#
# contactsProcessed$Job.Tenure.in.Months..Second.<-as.numeric(contactsProcessed$Job.Tenure.in.Months..Second.)
#
# contactsProcessed$Job.Tenure.in.Months..Third.<-as.numeric(contactsProcessed$Job.Tenure.in.Months..Third.)
#
# contactsProcessed$Job.Tenure.in.Months..First.<-as.numeric(contactsProcessed$Job.Tenure.in.Months..First.)
#
# contactsProcessed$First.Previous.Job.Salary<-as.numeric(contactsProcessed$First.Previous.Job.Salary)
#
# contactsProcessed$Second.Previous.Job.Salary<-as.numeric(contactsProcessed$Second.Previous.Job.Salary)
#
# contactsProcessed$Third.Previous.Job.Salary<-as.numeric(contactsProcessed$Third.Previous.Job.Salary)
#
# #
# contactsFiltered$Third.Previous.Job.Salary<-as.numeric(contactsFiltered$Third.Previous.Job.Salary)
#
#
# contactsProcessed<-contactsFiltered%>%
# left_join(Confidence,by="ContactID")%>%
# left_join(Efficacy,by="ContactID")
#
#
#
# #
# setwd("C:/Users/rcarder/Desktop")
# write.csv(contactsProcessed,"ContactsProcessed.csv",row.names = FALSE)
# #
# #
#